################################################
# Analysis for:
#
# Jordan, Soren. 2022. ``Extending Regression to Binary (and More!) Outcomes.'' In 
#  Teaching Graduate Political Methodology, eds. Mitchell Brown, 
#  Shane Nordyke, and Cameron G. Thies. Northampton, MA: Edward Elgar Publishing, pp. 169-181. 
#  DOI: 10.4337/9781800885288.00027
#
# Required files: None
#
################################################

library(tidyverse)
library(ggpubr)
library(nnet)

## Figure 16.1
# Create data
set.seed(1)
pos <- 1:100

x <- rnorm(100, 10, 3)
y <- rnorm(100, 2, 0.3)*x + rnorm(100, 0, 1)

the.break <- 20
to.zero <- sample(pos[y < the.break], (length(y[y < the.break]) - 4), replace = F) 
to.one <- sample(pos[y > the.break], (length(y[y > the.break]) - 4), replace = F) 

y[pos %in% to.zero] <- 0
y[pos %in% to.one] <- 1

y[y > the.break] <- 0
y[y < the.break & y != 0 & y != 1] <- 1

model <- glm(y ~ x, family = "binomial")
summary(model)

jitter.data <- data.frame(x = x, y = y)


x.plot <- seq((min(x) - 3), (max(x) + 3), 0.001)
plot.pos <- 1:length(x.plot)
pred.dat <- data.frame(x = x.plot)
y.plot <- predict(model, pred.dat)
y.trans <-  predict(model, pred.dat, type = "response")

breaks.at <- c(y.plot[1],
				y.plot[plot.pos[y.trans > 0.04990 & y.trans < 0.05010][1]],
				y.plot[plot.pos[y.trans > 0.09990 & y.trans < 0.10010][1]],
				y.plot[plot.pos[y.trans > 0.24990 & y.trans < 0.25010][1]],
				y.plot[plot.pos[y.trans > 0.39990 & y.trans < 0.40010][1]],
				y.plot[plot.pos[y.trans > 0.49990 & y.trans < 0.50010][1]],
				y.plot[plot.pos[y.trans > 0.59990 & y.trans < 0.60010][1]],
				y.plot[plot.pos[y.trans > 0.74990 & y.trans < 0.75010][1]],
				y.plot[plot.pos[y.trans > 0.89990 & y.trans < 0.90010][1]],
				y.plot[plot.pos[y.trans > 0.94990 & y.trans < 0.95010][1]],
				y.plot[length(y.plot)])

breaks.at.short <- c(y.plot[1],
				y.plot[plot.pos[y.trans > 0.09990 & y.trans < 0.10010][1]],
				y.plot[plot.pos[y.trans > 0.24990 & y.trans < 0.25010][1]],
				y.plot[plot.pos[y.trans > 0.49990 & y.trans < 0.50010][1]],
				y.plot[plot.pos[y.trans > 0.74990 & y.trans < 0.75010][1]],
				y.plot[plot.pos[y.trans > 0.89990 & y.trans < 0.90010][1]],
				y.plot[length(y.plot)])
				
gg.dat.xb <- data.frame(x = x.plot, y = y.plot)
gg.dat.trans <- data.frame(x = x.plot, y = y.trans)


p1 <- ggplot(data = gg.dat.xb, aes(x, y)) + 
	geom_line() +
	#scale_y_continuous(breaks = breaks.at, 
	#	labels = c(0.00, 0.05, 0.10, 0.25, 0.40, 0.50, 0.60, 0.75, 0.90, 0.95, 1.00)) + 
	theme_bw() +
	labs(x = "X Variable", y = "Y Response in\nUntransformed Space")
	
p2 <- ggplot(data = gg.dat.xb, aes(x, y)) + 
	geom_line() +
	scale_y_continuous(breaks = breaks.at.short, 
		labels = c(0.00, 0.10, 0.25, 0.50, 0.75, 0.90, 1.00)) + 
	theme_bw() +
	labs(x = "X Variable", y = "Y Response in\nProbability Space")
	
p3 <- ggplot(data = gg.dat.trans, aes(x, y)) + 
	geom_line() +
	#scale_y_continuous(breaks = breaks.at, 
	#	labels = c(0.00, 0.05, 0.10, 0.25, 0.40, 0.50, 0.60, 0.75, 0.90, 0.95, 1.00)) + 
	theme_bw() +
	geom_point(data = jitter.data, aes(x, y)) +
	labs(x = "X Variable", y = "Y Response in\nTransformed Probability\nSpace")


# pdf("/.../logit.pdf")
ggarrange(p1, p2, p3, ncol = 1, nrow = 3)
dev.off()


## Figure 16.2
x.plot <- seq((min(x) - 3), (max(x) + 3), 0.001)
plot.pos <- 1:length(x.plot)
pred.dat <- data.frame(x = x.plot)
y.plot <- predict(model, pred.dat)
y.trans <-  predict(model, pred.dat, type = "response")

gg.shift <- data.frame(X = c(rep(x.plot, 3)),
	Model = c(rep("Beta - 0.1", length(x.plot)), 
			rep("Intercept + 2.0", length(x.plot)), 
			rep("ML Estimates", length(x.plot))),
	Predictions = c(exp(coef(model)[1] + (coef(model)[2] - 0.1)*x.plot)/
						(1 + exp(coef(model)[1] + (coef(model)[2] - 0.1)*x.plot)),
					exp((coef(model)[1] + 2) + coef(model)[2]*x.plot)/
						(1 + exp((coef(model)[1] + 2) + coef(model)[2]*x.plot)),
					exp(coef(model)[1] + coef(model)[2]*x.plot)/
						(1 + exp(coef(model)[1] + coef(model)[2]*x.plot))))

gg.shift$Model <- factor(gg.shift$Model, levels = c("ML Estimates", "Beta - 0.1", "Intercept + 2.0"))

# pdf("/.../shifts.pdf")
ggplot(data = gg.shift, aes(x = X, y = Predictions)) +
	geom_line() + 
	theme_bw() +
	facet_grid(Model ~ .) +
	labs(x = "X Variable", y = "Predicted Probability") + 
	geom_point(data = jitter.data, aes(x, y)) 	
dev.off()




## Figure 16.3
# Create data
set.seed(2)
pos <- 1:100

x.new <- c(rnorm(50, 10, 3), rnorm(50, 40, 3))
y.new <- rnorm(100, 2, 0.3)*x.new + rnorm(100, 0, 1)

the.break <- 40
y.new[y.new < the.break] <- 0
y.new[y.new > the.break] <- 1

model.new <- glm(y.new ~ x.new, family = "binomial")
summary(model.new)

jitter.data.new <- data.frame(x = x.new, y = y.new)

x.plot <- seq((min(x.new) - 3), (max(x.new) + 3), 0.001)
plot.pos <- 1:length(x.plot)

gg.sep <- data.frame(X = c(rep(x.plot, 3)),
	Model = c(rep("Beta - 0.4", length(x.plot)), 
			rep("Intercept + 5.0", length(x.plot)), 
			rep("ML Estimates", length(x.plot))),
	Predictions = c(exp(coef(model.new)[1] + (coef(model.new)[2] - 0.4)*x.plot)/
						(1 + exp(coef(model.new)[1] + (coef(model.new)[2] - 0.4)*x.plot)),
					exp((coef(model.new)[1] + 5) + coef(model.new)[2]*x.plot)/
						(1 + exp((coef(model.new)[1] + 5) + coef(model.new)[2]*x.plot)),
					exp(coef(model.new)[1] + coef(model.new)[2]*x.plot)/
						(1 + exp(coef(model.new)[1] + coef(model.new)[2]*x.plot))))

# pdf("/.../sep.pdf")
ggplot(data = gg.sep, aes(x = X, y = Predictions)) + 
	geom_line(data = gg.sep, aes(lty = Model)) +
	geom_point(data = jitter.data.new, aes(x = x, y = y)) +
	theme_bw()
dev.off()







## Figure 16.4
# Create data
set.seed(1)
pos <- 1:100

x <- rnorm(100, 10, 3)

y.multi  <- rnorm(100, 2, 0.3)*x + rnorm(100, 0, 1)
y.letter <- rep(NA, length(y.multi))

the.break.1 <- 18
the.break.2 <- 24

to.A <- sample(pos[y.multi < the.break.1], (length(y.multi[y.multi < the.break.1]) - 8), replace = F) 
to.C <- sample(pos[y.multi > the.break.2], (length(y.multi[y.multi > the.break.2]) - 8), replace = F) 

y.letter[pos %in% to.A] <- "A"
y.letter[pos %in% to.C] <- "C"

to.B <- sample(pos[is.na(y.letter)], (length(pos[is.na(y.letter)]) - 10), replace = F)
y.letter[pos %in% to.B] <- "B"

for(i in 1:length(y.letter)) {
	if(is.na(y.letter[i])) {
		y.letter[i] <- sample(c("A", "B", "C"), 1)
	}
}

y.letter <- factor(y.letter)


model.multi <- multinom(y.letter ~ x)
summary(model.multi)


x.plot <- seq((min(x) - 3), (max(x) + 3), 0.001)
pred.dat.multi <- data.frame(x = x.plot)
y.trans.multi <-  predict(model.multi, pred.dat.multi, type = "probs")

gg.multi <- data.frame(X = c(rep(x.plot, 3)),
	Outcomes = c(rep("Pr(A)", length(x.plot)), 
			rep("Pr(B)", length(x.plot)), 
			rep("Pr(C)", length(x.plot))),
	Predictions = c(y.trans.multi[,1], y.trans.multi[,2], y.trans.multi[,3]))

# pdf("/Users/scj0014/Myfiles/Research/Teaching methods Mitchell/Logit/multi.pdf")
ggplot(data = gg.multi, aes(x = X, y = Predictions, lty = Outcomes)) + 
	geom_line() +
	#geom_point(data = jitter.data.new, aes(x = x, y = y)) +
	theme_bw() +
	geom_vline(xintercept = mean(x), lwd = 2) +
	geom_vline(xintercept = mean(x) + 1, lty = 2, lwd = 2) 
dev.off()


## Data for Table 16.1
x.plot.2 <- c(mean(x), mean(x) + 1)
pred.dat.multi.2 <- data.frame(x = x.plot.2)
y.trans.multi.2 <-  predict(model.multi, pred.dat.multi.2, type = "probs")
y.trans.multi.2 








